{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 首先 import 必要的模块\n",
    "import pandas as pd \n",
    "import numpy as np\n",
    "\n",
    "from sklearn.model_selection import GridSearchCV #gridsearch很多都有用\n",
    "from sklearn.linear_model import LogisticRegression\n",
    "#%matplotlib inline\n",
    "#评价指标为logLoss\n",
    "from sklearn.metrics import log_loss\n",
    "\n",
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 1.读取数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>pregnants</th>\n",
       "      <th>Plasma_glucose_concentration</th>\n",
       "      <th>blood_pressure</th>\n",
       "      <th>Triceps_skin_fold_thickness</th>\n",
       "      <th>serum_insulin</th>\n",
       "      <th>BMI</th>\n",
       "      <th>Diabetes_pedigree_function</th>\n",
       "      <th>Age</th>\n",
       "      <th>Target</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.639947</td>\n",
       "      <td>0.866045</td>\n",
       "      <td>-0.031990</td>\n",
       "      <td>0.670643</td>\n",
       "      <td>-0.181541</td>\n",
       "      <td>0.166619</td>\n",
       "      <td>0.468492</td>\n",
       "      <td>1.425995</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>-0.844885</td>\n",
       "      <td>-1.205066</td>\n",
       "      <td>-0.528319</td>\n",
       "      <td>-0.012301</td>\n",
       "      <td>-0.181541</td>\n",
       "      <td>-0.852200</td>\n",
       "      <td>-0.365061</td>\n",
       "      <td>-0.190672</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1.233880</td>\n",
       "      <td>2.016662</td>\n",
       "      <td>-0.693761</td>\n",
       "      <td>-0.012301</td>\n",
       "      <td>-0.181541</td>\n",
       "      <td>-1.332500</td>\n",
       "      <td>0.604397</td>\n",
       "      <td>-0.105584</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>-0.844885</td>\n",
       "      <td>-1.073567</td>\n",
       "      <td>-0.528319</td>\n",
       "      <td>-0.695245</td>\n",
       "      <td>-0.540642</td>\n",
       "      <td>-0.633881</td>\n",
       "      <td>-0.920763</td>\n",
       "      <td>-1.041549</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>-1.141852</td>\n",
       "      <td>0.504422</td>\n",
       "      <td>-2.679076</td>\n",
       "      <td>0.670643</td>\n",
       "      <td>0.316566</td>\n",
       "      <td>1.549303</td>\n",
       "      <td>5.484909</td>\n",
       "      <td>-0.020496</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   pregnants  Plasma_glucose_concentration  blood_pressure  \\\n",
       "0   0.639947                      0.866045       -0.031990   \n",
       "1  -0.844885                     -1.205066       -0.528319   \n",
       "2   1.233880                      2.016662       -0.693761   \n",
       "3  -0.844885                     -1.073567       -0.528319   \n",
       "4  -1.141852                      0.504422       -2.679076   \n",
       "\n",
       "   Triceps_skin_fold_thickness  serum_insulin       BMI  \\\n",
       "0                     0.670643      -0.181541  0.166619   \n",
       "1                    -0.012301      -0.181541 -0.852200   \n",
       "2                    -0.012301      -0.181541 -1.332500   \n",
       "3                    -0.695245      -0.540642 -0.633881   \n",
       "4                     0.670643       0.316566  1.549303   \n",
       "\n",
       "   Diabetes_pedigree_function       Age  Target  \n",
       "0                    0.468492  1.425995       1  \n",
       "1                   -0.365061 -0.190672       0  \n",
       "2                    0.604397 -0.105584       1  \n",
       "3                   -0.920763 -1.041549       0  \n",
       "4                    5.484909 -0.020496       1  "
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#input data\n",
    "train = pd.read_csv(\"C:/Users/14916/Desktop/s/FE_pima-indians-diabetes.csv\")\n",
    "train.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>pregnants</th>\n",
       "      <th>Plasma_glucose_concentration</th>\n",
       "      <th>blood_pressure</th>\n",
       "      <th>Triceps_skin_fold_thickness</th>\n",
       "      <th>serum_insulin</th>\n",
       "      <th>BMI</th>\n",
       "      <th>Diabetes_pedigree_function</th>\n",
       "      <th>Age</th>\n",
       "      <th>Target</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>7.680000e+02</td>\n",
       "      <td>7.680000e+02</td>\n",
       "      <td>7.680000e+02</td>\n",
       "      <td>7.680000e+02</td>\n",
       "      <td>7.680000e+02</td>\n",
       "      <td>7.680000e+02</td>\n",
       "      <td>7.680000e+02</td>\n",
       "      <td>7.680000e+02</td>\n",
       "      <td>768.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>-5.551115e-17</td>\n",
       "      <td>-4.625929e-18</td>\n",
       "      <td>-1.040834e-17</td>\n",
       "      <td>-1.457168e-16</td>\n",
       "      <td>1.619075e-17</td>\n",
       "      <td>2.613650e-16</td>\n",
       "      <td>2.498002e-16</td>\n",
       "      <td>1.815677e-16</td>\n",
       "      <td>0.348958</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>1.000652e+00</td>\n",
       "      <td>1.000652e+00</td>\n",
       "      <td>1.000652e+00</td>\n",
       "      <td>1.000652e+00</td>\n",
       "      <td>1.000652e+00</td>\n",
       "      <td>1.000652e+00</td>\n",
       "      <td>1.000652e+00</td>\n",
       "      <td>1.000652e+00</td>\n",
       "      <td>0.476951</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>-1.141852e+00</td>\n",
       "      <td>-2.552931e+00</td>\n",
       "      <td>-4.002619e+00</td>\n",
       "      <td>-2.516429e+00</td>\n",
       "      <td>-1.467353e+00</td>\n",
       "      <td>-2.074783e+00</td>\n",
       "      <td>-1.189553e+00</td>\n",
       "      <td>-1.041549e+00</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>-8.448851e-01</td>\n",
       "      <td>-7.201630e-01</td>\n",
       "      <td>-6.937615e-01</td>\n",
       "      <td>-4.675972e-01</td>\n",
       "      <td>-2.220849e-01</td>\n",
       "      <td>-7.212087e-01</td>\n",
       "      <td>-6.889685e-01</td>\n",
       "      <td>-7.862862e-01</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>-2.509521e-01</td>\n",
       "      <td>-1.530732e-01</td>\n",
       "      <td>-3.198993e-02</td>\n",
       "      <td>-1.230129e-02</td>\n",
       "      <td>-1.815412e-01</td>\n",
       "      <td>-2.258989e-02</td>\n",
       "      <td>-3.001282e-01</td>\n",
       "      <td>-3.608474e-01</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>6.399473e-01</td>\n",
       "      <td>6.112653e-01</td>\n",
       "      <td>6.297816e-01</td>\n",
       "      <td>3.291706e-01</td>\n",
       "      <td>-1.554775e-01</td>\n",
       "      <td>6.032562e-01</td>\n",
       "      <td>4.662269e-01</td>\n",
       "      <td>6.602056e-01</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>3.906578e+00</td>\n",
       "      <td>2.542658e+00</td>\n",
       "      <td>4.104082e+00</td>\n",
       "      <td>7.955377e+00</td>\n",
       "      <td>8.170442e+00</td>\n",
       "      <td>5.042397e+00</td>\n",
       "      <td>5.883565e+00</td>\n",
       "      <td>4.063716e+00</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          pregnants  Plasma_glucose_concentration  blood_pressure  \\\n",
       "count  7.680000e+02                  7.680000e+02    7.680000e+02   \n",
       "mean  -5.551115e-17                 -4.625929e-18   -1.040834e-17   \n",
       "std    1.000652e+00                  1.000652e+00    1.000652e+00   \n",
       "min   -1.141852e+00                 -2.552931e+00   -4.002619e+00   \n",
       "25%   -8.448851e-01                 -7.201630e-01   -6.937615e-01   \n",
       "50%   -2.509521e-01                 -1.530732e-01   -3.198993e-02   \n",
       "75%    6.399473e-01                  6.112653e-01    6.297816e-01   \n",
       "max    3.906578e+00                  2.542658e+00    4.104082e+00   \n",
       "\n",
       "       Triceps_skin_fold_thickness  serum_insulin           BMI  \\\n",
       "count                 7.680000e+02   7.680000e+02  7.680000e+02   \n",
       "mean                 -1.457168e-16   1.619075e-17  2.613650e-16   \n",
       "std                   1.000652e+00   1.000652e+00  1.000652e+00   \n",
       "min                  -2.516429e+00  -1.467353e+00 -2.074783e+00   \n",
       "25%                  -4.675972e-01  -2.220849e-01 -7.212087e-01   \n",
       "50%                  -1.230129e-02  -1.815412e-01 -2.258989e-02   \n",
       "75%                   3.291706e-01  -1.554775e-01  6.032562e-01   \n",
       "max                   7.955377e+00   8.170442e+00  5.042397e+00   \n",
       "\n",
       "       Diabetes_pedigree_function           Age      Target  \n",
       "count                7.680000e+02  7.680000e+02  768.000000  \n",
       "mean                 2.498002e-16  1.815677e-16    0.348958  \n",
       "std                  1.000652e+00  1.000652e+00    0.476951  \n",
       "min                 -1.189553e+00 -1.041549e+00    0.000000  \n",
       "25%                 -6.889685e-01 -7.862862e-01    0.000000  \n",
       "50%                 -3.001282e-01 -3.608474e-01    0.000000  \n",
       "75%                  4.662269e-01  6.602056e-01    1.000000  \n",
       "max                  5.883565e+00  4.063716e+00    1.000000  "
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#查看数值型特征的基本统计量\n",
    "train.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 768 entries, 0 to 767\n",
      "Data columns (total 9 columns):\n",
      "pregnants                       768 non-null float64\n",
      "Plasma_glucose_concentration    768 non-null float64\n",
      "blood_pressure                  768 non-null float64\n",
      "Triceps_skin_fold_thickness     768 non-null float64\n",
      "serum_insulin                   768 non-null float64\n",
      "BMI                             768 non-null float64\n",
      "Diabetes_pedigree_function      768 non-null float64\n",
      "Age                             768 non-null float64\n",
      "Target                          768 non-null int64\n",
      "dtypes: float64(8), int64(1)\n",
      "memory usage: 54.1 KB\n"
     ]
    }
   ],
   "source": [
    "train.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "#把数据分成x和y  get labels\n",
    "y_train = train['Target']   \n",
    "X_train = train.drop([\"Target\"], axis=1)\n",
    "\n",
    "#保存特征名字以备后用（可视化）\n",
    "feat_names = X_train.columns   #用于特征重要性的可视化"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# #2.模型训练"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##1）默认的logistic regression  正则超参数 c=1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.linear_model import LogisticRegression\n",
    "lr=LogisticRegression()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "logloss of each fold is: [0.48797856 0.53011593 0.4562292  0.422546   0.48392885]\n",
      "cv logloss is 0.47615970944434044\n"
     ]
    }
   ],
   "source": [
    "#交叉验证用于评估模型性能和进行参数调优（模型选择）\n",
    "#分类中交叉验证采用分层 stratifiesKFold #不同类别的分布是一样的\n",
    "from sklearn.model_selection import cross_val_score  #交叉验证分成4份 \n",
    "loss=cross_val_score(lr,X_train,y_train,cv=5, scoring='neg_log_loss',n_jobs = 4)  #评价指标 gridsearch 越大越好 -似然损失log loss\n",
    "print('logloss of each fold is:',-loss)\n",
    "print('cv logloss is',-loss.mean())  #分别取平均"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 2)用log似然损失对超参数调优-------------------正则化的Logistic regression"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "liblinear_lr best_score= 0.4760271305117676\n",
      "liblinear_lr best_params= {'C': 1, 'penalty': 'l1'}\n"
     ]
    }
   ],
   "source": [
    "#调整超参数有—:c正则系数 一般取log后的值，正则函数L2/L1\n",
    "\n",
    "#sklearn中，调优不同的模型的参数的步骤相同\n",
    "#首先生成一个gridsearchcv的实例\n",
    "#调用GridSearchCV的 fit函数\n",
    "\n",
    "from sklearn.model_selection import GridSearchCV\n",
    "from sklearn.linear_model import LogisticRegression\n",
    "\n",
    "#需要调优的参数 \n",
    "#try to 将L1/L2分开，并配合是何的优化算法slover\n",
    "#tuned——parameters={'penalty':{'l1','l2'},'c'={0.001,0.01,0.1,1,10,100,1000}}\n",
    "\n",
    "\n",
    "penaltys = ['l1','l2']\n",
    "Cs = [ 0.01,0.1, 1, 10, 100, 1000,10000]    #搜索范围\n",
    "\n",
    "tuned_parameters = dict(penalty = penaltys, C = Cs) #组合调优参数\n",
    "\n",
    "liblinear_lr= LogisticRegression(solver='liblinear')\n",
    "#生成gridsearch的实例，将5个参数值放进去。\n",
    "#score是找最佳分数对应的超参数，值越大 性能越好，-logloss \n",
    "grid= GridSearchCV(liblinear_lr, tuned_parameters,cv=5, scoring='neg_log_loss',n_jobs = 4,iid=True,refit=True,return_train_score='warn') #似然损失\n",
    "#grid= GridSearchCV(lr_penalty, tuned_parameters,cv=5, scoring='accuracy')#正确率\n",
    "grid.fit(X_train,y_train) #调用fit函数\n",
    "print('liblinear_lr best_score=',-grid.best_score_)\n",
    "print('liblinear_lr best_params=',grid.best_params_)\n",
    "liblinear_lr_best_estimator_grid=grid.best_estimator_ \n",
    "#solver='liblinear',cv=5, scoring='neg_log_loss'时网格搜索到的最优模型：C=1, penalty=l1"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "最佳得分是当系数为1的L1正则"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.4760271305117676\n",
      "{'C': 1, 'penalty': 'l1'}\n"
     ]
    }
   ],
   "source": [
    "# examine the best model\n",
    "print(-grid.best_score_)#打印模型参数\n",
    "print(grid.best_params_)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#color red# log似然损失对Logistic回归模型的正则超参数调优后的结果为：\n",
    "\n",
    "最优分数是0.4760271305117676，选用的参数是C=1，并且正则项选为 L1正则。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "#绘制CV误差曲线分析模型\n",
    "# plot CV误差曲线\n",
    "test_means = grid.cv_results_[ 'mean_test_score' ]\n",
    "test_stds = grid.cv_results_[ 'std_test_score' ]\n",
    "train_means = grid.cv_results_[ 'mean_train_score' ]\n",
    "train_stds = grid.cv_results_[ 'std_train_score' ]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAY4AAAEKCAYAAAAFJbKyAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAgAElEQVR4nO3deXhc1Z3n//enSpJ3MLZlS3jBhpjFlrATFAOB0OzIggYSSIBOJ5DNSU+YdHe66cB0D+mQdDrd/ftNMnmGYQ1bh8QQEoIDXliCIaFZLIjBG4sxEIRtbGwMBi+ypO/8UVemLGSrSq5yafm8nqce1T33nFPfa1B9dc+99xxFBGZmZrlKlToAMzPrXZw4zMwsL04cZmaWFycOMzPLixOHmZnlxYnDzMzyUtTEIale0guSVkq6fDd1PitpuaRlkn6eVd4qaXHympNVPknSk5JeknSHpIpiHoOZme1KxXqOQ1IaeBE4DWgCFgEXRcTyrDqTgTuBkyPibUmjI2Jdsu+9iBjaSb93Ar+OiNmSrgWejYhrinIQZmb2IcU845gBrIyIVRHRDMwGzulQ56vA1RHxNkB70tgdSQJOBu5Kim4Fzi1o1GZmtkdlRex7LPB61nYTcHSHOocCSHoMSAP/HBHzk30DJTUCLcAPI+I3wEhgU0S0ZPU5tqtARo0aFRMnTuzucZiZ9UtPP/30WxFR2bG8mIlDnZR1HBcrAyYDJwLjgN9LqomITcCEiFgt6WDgd5KWAO/m0Gfmw6VZwCyACRMm0NjY2L2jMDPrpyS91ll5MYeqmoDxWdvjgNWd1LknInZExCvAC2QSCRGxOvm5ClgIfBR4CxguqWwPfZK0uz4i6iKirrLyQwnTzMy6qZiJYxEwObkLqgK4EJjToc5vgJMAJI0iM3S1StIBkgZklR8HLI/MlfyHgfOT9hcD9xTxGMzMrIOiJY7kOsSlwAJgBXBnRCyTdJWks5NqC4ANkpaTSQiXRcQG4AigUdKzSfkPs+7G+jbwLUkryVzz+GmxjsHMzD6saLfj9iR1dXXhaxxmBrBjxw6amprYtm1bqUPpMQYOHMi4ceMoLy/fpVzS0xFR17F+MS+Om5n1OE1NTQwbNoyJEyeSucO/f4sINmzYQFNTE5MmTcqpjaccMbN+Zdu2bYwcOdJJIyGJkSNH5nUG5sRhZv2Ok8au8v33cOIwM+vCBdc9zgXXPV6w/oYO/WA2pfr6eoYPH85ZZ53Vad1vfOMbTJ8+nSlTpjBo0CCmT5/O9OnTueuuuzqtvzvPPPMM8+fP77piDnyNYw/a/0e542vHljgSM+urLrvsMrZs2cJ1113X6f6rr74agFdffZWzzjqLxYsXd+tznnnmGZYuXUp9fX23Y23nMw4zsxI65ZRTGDZsWLfavvTSS5xxxhkcddRRnHDCCbz44osAzJ49m5qaGqZNm8ZJJ53E1q1bueqqq7j99tu7dbbSkc84zMx6qVmzZnHjjTdyyCGH8Nhjj3HppZdy//33893vfpeFCxcyZswYNm3axKBBg7jyyitZunQpP/7xj/f6c504zKzf+u5vl7F8dWdT4O1q+ZpMnVyuc0w5cD++8+dT9zq2rmzatIknnniC8847b2dZS0tm/tfjjjuOL3zhC3zmM5/h05/+dME/24ljD17b8D4tbX3/AUkz630iglGjRnV6zeOGG27gySef5N5772XatGk899xzBf1sJ449CGDD+828v72FIQP8T2XW1+R6ZtATb5Q54IADqK6u5u677+ZTn/oUbW1tLFmyhGnTprFq1SqOOeYYjj76aObMmcMbb7zBsGHD2Lx5c0E+2xfH92DE4AoiYOEL60sdipn1UZ/85Cf5zGc+w0MPPcS4ceNYsGBBzm1nz57Ntddey7Rp05g6dSr33nsvAH/7t39LbW0ttbW1nHrqqdTU1HDyySfz7LPP8tGPftQXx4tp2MAyylJi7tI1nHlkdanDMbM+4r333tv5/ve//31ObSZOnMjSpUt3KTv44IM7TTRz5nSciBwqKysLti6Rzzj2QBIjhlTw8PPr2LajtdThmJn1CD7j6MKIIRWs27ydR15czxlTq0odjpmVQE+6ttET+IxjD+742rH89r8fzwGDy5m3ZE2pwzEz6xGcOLpQnk5x+pQqHlyxju0tHq4yMytq4pBUL+kFSSslXb6bOp+VtFzSMkk/T8qmS3o8KXtO0gVZ9W+R9IqkxclrejGPAaC+tor3trfwh5feKvZHmZn1eEW7xiEpDVwNnAY0AYskzclaAhZJk4ErgOMi4m1Jo5NdW4AvRMRLkg4Enpa0ICI2Jfsvi4i9u58sD8cdMophA8uYt3QtpxwxZl99rJlZj1TMM44ZwMqIWBURzcBs4JwOdb4KXB0RbwNExLrk54sR8VLyfjWwDqgsYqx7VFGW4rQpY7h/2VqaW9pKFYaZlcrNZ2ZeBdI+rfrixYs59thjmTp1KkceeSR33HHHh+r2t2nVxwKvZ203AUd3qHMogKTHgDTwzxGxy5FJmgFUAC9nFf+LpCuBh4DLI2J7gWP/kIaaan79zBs8vmoDf3ZoyXKYmfUhgwcP5rbbbmPy5MmsXr2ao446ijPOOIPhw4fvrNPfplXvbEmpjhM/lQGTgROBi4AbJe38F5NUDfwn8MWIaP9T/wrgcODjwAjg251+uDRLUqOkxvXr9/7J7+Mnj2JIRdp3V5lZwRx66KFMnjwZgAMPPJDRo0eTz/dVX5xWvQkYn7U9DljdSZ0nImIH8IqkF8gkkkWS9gPuA/4pIp5obxAR7d/c2yXdDPx9Zx8eEdcD1wPU1dXt9UyFA8vTnHLEGO5f/ibfP7eNsrRvSDOzwnnqqadobm7mkEMOyblNX5xWfREwWdIk4A3gQuAvOtT5DZkzjVskjSIzdLVKUgVwN3BbRPwyu4Gk6ohYo8wiuecCS9lHGmqrmPPsap56ZSOf+MioffWxZlYs8y6HtUu6rrc2mV02l+scVbUw84d5hbFmzRo+//nPc+utt5JK5fZHaZ+cVj0iWiRdCiwgc/3ipohYJukqoDEi5iT7Tpe0HGglc7fUBkl/CZwAjJR0SdLlJRGxGLhdUiWZobDFwNeLdQwd/dmhoxlUnmbu0jVOHGZWEO+++y5nnnkm3//+9znmmGNybtdnp1WPiLnA3A5lV2a9D+BbySu7zs+An+2mz5MLH2luBlWkOenwShYse5Pvnl1DOtXZZRwz6zVyPTNoP9P44n0F/fjm5mY+9alP7Tw7yIenVe9FZtZUs37zdp5+7e1Sh2Jmvdydd97Jo48+yi233LLzNtt87poq1bTqyvzR37fV1dVFoaYTfm97C0d97wEumjGBfz67+MtDmllhrVixgiOOOCK/RkU64+hJOvt3kfR0RNR1rOszjjwNHVDGCYdWMn/pWtq8rKyZ9UNOHN3QUFvF2ne3sbhpU9eVzaz3++J9ffpsI19OHN1wyhFjKE/LDwOaWb/kxNEN+w0s55OTK5m7ZC394RqRWV/j39td5fvv4cTRTfU1VbyxaStL3nin1KGYWR4GDhzIhg0bnDwSEcGGDRsYOHBgzm28dGw3nT5lDP8jJeYtXcuR44Z33cDMeoRx48bR1NSU15xQfd3AgQMZN25czvWdOLpp+OAKjj1kJPOWrOEfzjiMzAwoZtbTlZeXM2nSpFKH0at5qGovNNRW8+qGLaxYU5inMc3MegMnjr1w+pQxpATzl/ruKjPrP5w49sLIoQM4etJI5i5dW+pQzMz2GSeOvdRQW8XKde/x0pserjKz/sGJYy+dMbUKCeYu8VmHmfUPThx7afR+A6k76ADm+TqHmfUTRU0ckuolvSBppaTLd1Pns5KWS1om6edZ5RdLeil5XZxVfpSkJUmfP1EPuA92Zk01z6/dzKr175U6FDOzoita4pCUBq4GZgJTgIskTelQZzJwBXBcREwF/iYpHwF8BzgamAF8R9IBSbNrgFlk1iafDNQX6xhyVV9TBcA8XyQ3s36gmGccM4CVEbEqIpqB2cA5Hep8Fbg6It4GiIh1SfkZwAMRsTHZ9wBQL6ka2C8iHk9WD7yNzLrjJXXg8EFMHz/cw1Vm1i8UM3GMBV7P2m5KyrIdChwq6TFJT0iq76Lt2OT9nvosiYbaKpa+8S6vb9xS6lDMzIqqmImjs2sPHWcVKyMz3HQicBFwo6The2ibS5+ZD5dmSWqU1Lgv5qSZWVMN4LMOM+vzipk4moDxWdvjgNWd1LknInZExCvAC2QSye7aNiXv99QnABFxfUTURURdZWXlXh1ILsaPGEzN2P18W66Z9XnFTByLgMmSJkmqAC4E5nSo8xvgJABJo8gMXa0CFgCnSzoguSh+OrAgItYAmyUdk9xN9QXgniIeQ15m1lSz+PVNrN60tdShmJkVTdESR0S0AJeSSQIrgDsjYpmkqySdnVRbAGyQtBx4GLgsIjZExEbge2SSzyLgqqQM4K+AG4GVwMvAvGIdQ75mJndXzffdVWbWh6k/LGZSV1cXjY2N++Sz6n/8KMMGlvHLr39in3yemVmxSHo6Iuo6lvvJ8QKbWVNN42tvs+7dbaUOxcysKJw4CqyhtooIWLDMw1Vm1jc5cRTY5DHD+Mjoob67ysz6LCeOImioqeLJVzbw1nvbSx2KmVnBOXEUQX1NNW0B9y97s9ShmJkVnBNHERxRPYyJIwf7KXIz65OcOIpAEjNrq/mvlzfw9vvNpQ7HzKygnDiKZGZNFa1twQMrPFxlZn2LE0eR1I7dn7HDB/kpcjPrc5w4ikQSDbVV/P6l9by7bUepwzEzKxgnjiKaWVvNjtbgIQ9XmVkf4sRRRNPHDadqv4F+GNDM+hQnjiJKpUR9TRWPvLie97a3lDocM7OCcOIosobaappb2nj4+XVdVzYz6wWcOIrsqIMOoHLYAD8MaGZ9hhNHkaVT4oypY3j4+fVsbW4tdThmZnutqIlDUr2kFyStlHR5J/svkbRe0uLk9ZWk/KSsssWStkk6N9l3i6RXsvZNL+YxFEJDTTVbd7TyyIserjKz3q+sWB1LSgNXA6cBTcAiSXMiYnmHqndExKXZBRHxMDA96WcEmWVi78+qcllE3FWs2AttxqQRjBhSwdwla6mvqS51OGZme6WYZxwzgJURsSoimoHZwDnd6Od8YF5EbClodPtQWTrF6VPG8NCKN9m2w8NVZta7FTNxjAVez9puSso6Ok/Sc5LukjS+k/0XAr/oUPYvSZsfSRpQoHiLamZtNe83t/KHl94qdShmZnulmIlDnZRFh+3fAhMj4kjgQeDWXTqQqoFaYEFW8RXA4cDHgRHAtzv9cGmWpEZJjevXr+/eERTQJw4Zyf6Dypnru6vMrJcrZuJoArLPIMYBq7MrRMSGiGhfJu8G4KgOfXwWuDsidmS1WRMZ24GbyQyJfUhEXB8RdRFRV1lZuZeHsvfK0ylOmzKGB5a/SXNLW6nDMTPrtmImjkXAZEmTJFWQGXKak10hOaNodzawokMfF9FhmKq9jSQB5wJLCxx30cysqWLzthYee9nDVWbWexXtrqqIaJF0KZlhpjRwU0Qsk3QV0BgRc4BvSjobaAE2Ape0t5c0kcwZyyMdur5dUiWZobDFwNeLdQyFdvzkUQwdUMb8JWs56bDRpQ7HzKxbFNHxskPfU1dXF42NjaUOA4C/mf1HFr64nkX/eCrlaT9/aWY9l6SnI6KuY7m/ufax+ppqNm3ZwZOrNpY6FDOzbnHi2MdOPKySwRVp311lZr2WE8c+NrA8zUmHj+b+ZWtpbev7w4Rm1vc4cZRAQ001b73XzKJXPVxlZr2PE0cJnHhYJQPKUsxb4uEqM+t9nDhKYMiAMk48rJL5y9bS5uEqM+tlnDhKpKG2mjff3c4fX3+71KGYmeXFiaNETj58NBXpFHOXrC11KGZmeXHiKJFhA8v55ORRzF+6lv7wEKaZ9R1OHCU0s7aaNzZt5bmmd0odiplZzpw4Sui0I8ZQlpIfBjSzXsWJo4T2H1zOJz4yinlLPFxlZr2HE0eJNdRU8aeNW1i+5t1Sh2JmlpO8E4eklKT9ihFMf3T61CrSKTHPd1eZWS+RU+KQ9HNJ+0kaAiwHXpB0WXFD6x9GDKngmINHMHfJGg9XmVmvkOsZx5SIeJfMintzgQnA54sWVT9TX1PNqrfe58U33yt1KGZmXco1cZRLKieTOO5J1gDv8s9jSfWSXpC0UtLlney/RNJ6SYuT11ey9rVmlc/JKp8k6UlJL0m6I1mWtlc7Y+oYJJjnu6vMrBfINXFcB7wKDAEelXQQsMeruZLSwNXATGAKcJGkKZ1UvSMipievG7PKt2aVn51V/m/AjyJiMvA28OUcj6HHGj1sIB+fOMLXOcysV8gpcUTETyJibEQ0RMZrwEldNJsBrIyIVRHRDMwGztmbYCUJOBm4Kym6lcxZUK/XUFPFC29uZuU6D1eZWc+W68Xxv04ujkvSTyU9Q+YLfE/GAq9nbTclZR2dJ+k5SXdJGp9VPlBSo6QnJLUnh5HApoho6aLPXqe+phqA+R6uMrMeLtehqi8lF8dPByqBLwI/7KKNOinreF3kt8DEiDgSeJDMGUS7Ccki6X8B/FjSITn2mflwaVaSeBrXr1/fRailV7X/QD42YTjzlnq4ysx6tlwTR/sXdgNwc0Q8S+df4tmagOwziHHA6uwKEbEhIrYnmzcAR2XtW538XAUsBD4KvAUMl1S2uz6z2l8fEXURUVdZWdlFqD1DQ201y1a/y2sb3i91KGZmu5Vr4nha0v1kEscCScOAti7aLAImJ3dBVQAXAnOyK0iqzto8G1iRlB8gaUDyfhRwHLA8Mg86PAycn7S5GLgnx2Po8c6YWgXgsw4z69FyTRxfBi4HPh4RW4AKMsNVu5Vch7gUWEAmIdwZEcskXSWp/S6pb0paJulZ4JvAJUn5EUBjUv4w8MOIWJ7s+zbwLUkryVzz+GmOx9DjjR8xmCPH7e/EYWY9mnJ9Wjn5sj8h2XwkIn5btKgKrK6uLhobG0sdRk6uWfgy/zb/ef7w7ZMYd8DgUodjZv2YpKeTa827yPWuqh8Cf01mupHlZM4U/rWwIRrAzJrMcNV8n3WYWQ+V61BVA3BaRNwUETcB9cCZxQur/5o4aghHVO/n4Soz67HymR13eNb7/QsdiH2goaaKp197m7XvbCt1KGZmH5Jr4vhX4I+SbpF0K/A08IPihdW/zazN3Gy2YJnPOsys58l1ypFfAMcAv05ex0bE7GIG1p99ZPRQDh0zlLlL/BS5mfU8e0wckj7W/gKqyTzU9zpwYFJmRVJfU81Tr25k/ebtXVc2M9uHyrrY///vYV/Q9XxV1k0NtVX85KGXuH/5Wj539EGlDsfMbKc9Jo6I6GoGXCuSw8YM4+BRQ5i3xInDzHqWrs44AJD06U6K3wGWRMS6woZkAJKor6niukdX8fb7zRwwpNevV2VmfUQ+U47cCHwued0AfAt4TJKXkC2ShtpqWtuCB5a/WepQzMx2yjVxtAFHRMR5EXEemRX9tgNHk5k7yopg6oH7MX7EIOZ6jQ4z60FyTRwTIyL7z951wKERsRHYUfiweoibz8y8SkQSDTXVPLbyLd7Z0nf/mc2sd8k1cfxe0r2SLpZ0MZnp0R+VNATYVLzwrL6mih2twYMrPFxlZj1DronjG8DNwHQyCyrdCnwjIt73nVfFNX38cA7cf6DnrjKzHiOnu6oiIiT9AWgm8/zGU5HrfOy2VzJ3V1XzsydfY/O2HQwbWF7qkMysn8t1WvXPAk+RWXnvs8CTks7fcyuQVC/pBUkrJV3eyf5LJK2XtDh5fSUpny7p8WSRp+ckXZDV5hZJr2S1mZ7rwfZWM2uraG5p43fP+85nMyu9nM44gH8ks/rfOgBJlcCDwF27ayApDVwNnEZmqpJFkuZkreTX7o6IuLRD2RbgCxHxkqQDySxduyAi2q+nXBYRu/3svuaoCQcwetgA5i1ZyznTx5Y6HDPr53K9xpHq8KDfhhzazgBWRsSqiGgGZgPn5PJhEfFiRLyUvF9N5i6uyhxj7XNSqczDgAtfXMeW5pZSh2Nm/VyuiWO+pAXJ0NIlwH3A3C7ajCUzIWK7pqSso/OS4ai7JI3vuFPSDDJrnL+cVfwvSZsfSRqQ4zH0ajNrqtm2o42FL6wvdShm1s/lOq36ZcD1wJHANOD6iOjqwT911lWH7d+SeUbkSDJDX7fu0oFUDfwn8MWIaEuKrwAOBz4OjGA3DyBKmiWpUVLj+vW9/8t2xqQRjBxS4anWzazkcl4BMCJ+FRHfioi/jYi7c2jSBGSfQYwDVnfoc0NEtM8bfgNwVPs+SfuRObP5p4h4IqvNmsjYTuYW4Rm7iff6iKiLiLrKyt4/ypVOidOnVvHw8+vYtqO11OGYWT/W1XocmyW928lrs6R3u+h7ETBZ0iRJFcCFZB4czO6/OmvzbGBFUl4B3A3cFhG/7KyNJAHnAku7Psy+oaG2ivebW3n0xd5/BmVmvVdX06oP627HEdEi6VJgAZAGboqIZZKuAhojYg7wTUlnAy3ARuCSpPlngROAkck1FYBLImIxcHtyV5eAxcDXuxtjb3PMwSMZPriceUvXcvrUqlKHY2b9VK6343ZLRMylw0X0iLgy6/0VZK5ZdGz3M+Bnu+mz3y4eVZ5OcdoRY5i/dC3bW1oZUJYudUhm1g/lfI3DeoaG2mo2b2/hv1ZuKHUoZtZPOXH0Mp/4yEiGDSzz3VVmVjJOHL3MgLI0px4xhvuXv8mO1rauG5iZFZgTRy80s6aKd7bu4PGXPVxlZvueE8eevLcONq+FHjYR8AmHVjKkIu2p1s2sJJw4dicCtrwFG1fCXV+CbV09trLvDCxPc/IRY7h/2VpaPFxlZvuYE8fuSFB5BAw/CJbfA9edAKv/WOqodppZU8WG95t56tWNpQ7FzPoZJ449kWD/8fDFudDaDDeeBk9c0yOGrk48rJKB5Snme7iq11r2g+NZ9oPjSx1GQfhYep5iHocTRy4mHANf/wN85FSYfznM/gvYUtq/9AdXlHHSYaOZt3QtbW2lT2T7Sl/5pTbrzZw4cjV4BFz0CzjjX+GlB+DaT8KfnixpSDNrq1m/eTtP/+ntksZhZv2LE0c+JDj2v8GX74d0Gdw8E37/v6CtNBeoTz58NBVlKT8MaGb7lBNHd4z9GHztUTjiz+Gh78Lt58F7+37G2qEDyjhhciXzcxiuuuC6x7ngusf3UWRm1pc5cXTXwP3hM7fAWT+CVx+Da4+DVY/s8zAaaqtY8842nm3atMd6V264jCs3XLaPojKzvsyJY29IUPcl+OrvYMB+cNs58PAPoG3fLbR0yhFjKE/LDwOa2T7jxFEIVTUwayFMuwge+Te49Wx4d3VXrQpi/0HlHPeRUcxdsoboAbcJm1nf58RRKAOGwqeugXOvgdXPwLXHZ+6+2gcaaqppensry1b3nKfbzazvKmrikFQv6QVJKyVd3sn+SyStl7Q4eX0la9/Fkl5KXhdnlR8laUnS50+SJWR7jul/AbMegaFVcPv5cP//hNYdRf3I06aMIZ2S764ys32iaIlDUhq4GpgJTAEukjSlk6p3RMT05HVj0nYE8B3gaGAG8B1JByT1rwFmAZOTV32xjqHbKg+Frz6Uuf7xXz/J3Lb79mtF+7gDhlRw7MEjmbd0rYerzKzoinnGMQNYGRGrIqIZmA2ck2PbM4AHImJjRLwNPADUS6oG9ouIxyPzDXkbcG4xgt9r5YMyd1ydfzOsfwGu+yQsn1O0j5tZW8Urb73PC29uLtpnmJlBcRPHWOD1rO2mpKyj8yQ9J+kuSeO7aDs2ed9Vnz1Hzaczz3yMOBju/DzMvQx2bCv4x5w+pYqUYO4S311lZsVVzMTR2bWHjuMovwUmRsSRwIPArV20zaXPTAfSLEmNkhrXr9/3D+ftYsQk+NL9cMw34Knr4aenwYaXC/oRlcMG8PGJI5jn6xxmVmTFTBxNwPis7XHALveoRsSGiNiebN4AHNVF26bk/W77zOr7+oioi4i6ysrKbh9EwZRVQP0P4MJfwKY/ZaZpf+6XBf2IhtpqXlr3HivXebjKzIqnmIljETBZ0iRJFcCFwC6D/Mk1i3ZnAyuS9wuA0yUdkFwUPx1YEBFrgM2SjknupvoCcE8Rj6HwDm/IzLQ7pgZ+/RW451Jo3lKQrutrqgCY5+EqMyuioiWOiGgBLiWTBFYAd0bEMklXSTo7qfZNScskPQt8E7gkabsR+B6Z5LMIuCopA/gr4EZgJfAyMK9Yx1A0w8fDJffBJ/8O/vgzuOEkWLei63ZdGLPfQOoOOoC5forczIqorJidR8RcYG6Hsiuz3l8BXLGbtjcBN3VS3gjUFDbS3fjifcXrO10Gp1wJBx0Hd38Nrj8JGv4DPvqXmalMuqm+porv37eCV956n0mjhhQwYDOzDD85XmofOQW+/hiM/zjMuRR+/VXY3v1rFDNrM6N/85b6IrmZFYcTR08wbAx8/jdw0j/B0l9lLpyvebZbXY0dPohp44d7SVkzKxonjp4ilYY/uwwuvhd2bIUbT4Unr+/W+uYNNVU81/QOr28szEV3M7NsThw9zcTjMkNXB58I8y6DO/4Stua3NOzMmsxwlc86zKwYnDh6oiEj4aI74PTvw4vz4doT4PVFOTefMHIwUw/cz9c5zKwonDh6qlQKPvHf4UsLMs/L31wPj/3vnNc3b6it5pk/bWLNO1uLG6eZ9TtOHD3duDr42u/hsJnwwJXw88/C+2912az9YUAPV5lZoTlx9AaDhsNn/xMa/j945ZHMIlGv/mGPTQ6pHMphY4b5KXIzKzgnjt5Cghlfha88BOWD4dY/h4X/tsf1zWfWVrHotY2s21z42XjNrP9y4uhtqo+Erz0CNefDwh/AbefA5s7PKhpqq4mABcve3MdBmllf5sTRGw0YBp++Hs65Gpoa4ZrjYOWDH6o2efRQDq4c4qnWzaygnDh6Kykzr9WshTCkEn52Hjz4z7usby6JhppqnnxlI++0DSpVpGbWxzhx9HajD4ev/g4+djH84Udwy5mw6YPFE2fWVtHaFjzRcmgJgzSzvsSJoy+oGAxn/wTO+ym8uSxz19XzmUmJp4EoNG0AAAyuSURBVFTvx0EjB/OHHYeXOEgz6yucOPqS2vMz65sPnwCzL4J5l6PWZuprqni29SA2x8BSR2hmfUBR1+OwEhh5CHzlQbj/f8KT18CfHufc43/CdaT51y3nMuXXSxg6IM3gijKGDihjyIAyhgxIM6SijMED0h+UVXxQnkp1f30QM+t7ipo4JNUD/xtIAzdGxA93U+984JfAxyOiUdLngMuyqhwJfCwiFktaCFQD7XNpnB4R64p1DL1S2QBo+HeYdALc8984/J4z+bvy0/lF60k8sPxN3t/ewtYdu3/+o6NB5WmGDCjbJeEMHpCUfSjhZMo/SEzpJDl9ULc87RNds95M0Y1pu3PqWEoDLwKnAU1kloC9KCKWd6g3DLgPqAAuTVb4y95fC9wTEQcn2wuBv+9Yb0/q6uqisTHn6n3Lpj/BXV+CpkW0kiI9YBikUoTShDI/20jRplTmJyla21+RebUgWiJFS4iWEDvaUuwIsaMNdrSJ5uTV2qF9W3sfaGd5GylIpUmny0ilMz/T6TLKypJXuozy8sz78vJyypOfFeXllJeX8e6i2ZTRxugTvkxEW/IKoi35GQFtrQQBbW20RRDRBu0/24K2ZJv2ttG28z0762btT/ra2Ya2zGz3O/tJ+iDQzn6Svvigz/b37ft3bMzcxFBxwIGQ2Qtkpib70G9lgJLS6LBXu1bLlHXyex180MeHd8Yu9cixbruWzesBKBs2qvM2vciOzZkpfcp7+bG0H8dBs37OmHGHdKsPSU9HRF3H8mKeccwAVkbEqiSA2cA5wPIO9b4H/Dvw97vp5yLgF8UKss8bPgG+OI81P6ilIpoZOe18iFbU1oqiFdraSEdr5gn07J/JFzAf2tfWSXkbEa1Eawttba2ZL/G2FqKtlWjLapd8LtGGopVUaxtqaSVNbhM37uKRvyv8v1UHbZH56mwjReZrv/2naEPELq8P6pEkyuw2KNOG7DZqIxC8++Fp83f/51znw4bRafnu6u6u6w/X77zfD/cdNGcS09Z9MalmcYdOg+0ApLb27hkX2pLjaN2xo4ua+Stm4hgLvJ613QQcnV1B0keB8RFxr6TdJY4LyCScbDdLagV+BXw/inXa1Feky9mYzvz1NLLh34vyEUpe3R6EamvbJTlFWwvNLS1s2daceW3fztbtzaz85ZU0R5rqhn8gpRSSSKVSIJFOpVAqjSSUSpFSilRKKJUmlVWmlEil0qRSIiVBKk1KKdKpdLIvtXNf5sXO90pl3qcltLM8+ZnntaBlPzgegKn/Y8/zjvUGPpaep/04DplU+Dsqi5k4Ovst2vkFLykF/Ai4ZLcdSEcDWyJiaVbx5yLijWSI61fA54HbOmk7C5gFMGHChO7Eb/tSKgWkIF0OZP7nGQAMGAoHZFVLl2emT5k640Nnz2a2jxTzKmUTMD5rexywOmt7GFADLJT0KnAMMEdS9jfChXQYpoqIN5Kfm4GfkxkS+5CIuD4i6iKirrKyci8PxczM2hUzcSwCJkuaJKmCTBKY074zIt6JiFERMTEiJgJPAGe3X/ROzkg+A8xubyOpTNKo5H05cBaQfTZiZmZFVrShqohokXQpsIDM7bg3RcQySVcBjRExZ889cALQ1H5xPTEAWJAkjTTwIHBDEcI3M7PdKOpzHBExF5jboezK3dQ9scP2QjLDV9ll7wNHFTRIMzPLi5/EMjOzvDhxmJlZXpw4zMwsL04cZmaWFycOMzPLixOHmZnlxYnDzMzy4sRhZmZ5ceIwM7O8OHGYmVlenDjMzCwvThxmZpYXJw4zM8uLE4eZmeXFicPMzPJS1MQhqV7SC5JWSrp8D/XOlxTty8ZKmihpq6TFyevarLpHSVqS9PkTSZ2tbW5mZkVStIWcJKWBq4HTyKw/vkjSnIhY3qHeMOCbwJMdung5IqZ30vU1wCwyS83OBeqBeQUO38zMdqOYZxwzgJURsSoimsmsHX5OJ/W+B/w7sK2rDiVVA/tFxOMREcBtwLkFjNnMzLpQzMQxFng9a7spKdtJ0keB8RFxbyftJ0n6o6RHJH0yq8+mPfVpZmbFVcw1xzu79hA7d0op4EfAJZ3UWwNMiIgNko4CfiNpald97vLh0iwyQ1pMmDAhv8jNzGy3innG0QSMz9oeB6zO2h4G1AALJb0KHAPMkVQXEdsjYgNARDwNvAwcmvQ5bg997hQR10dEXUTUVVZWFuiQzMysmIljETBZ0iRJFcCFwJz2nRHxTkSMioiJETGRzMXusyOiUVJlcnEdSQcDk4FVEbEG2CzpmORuqi8A9xTxGMzMrIOiDVVFRIukS4EFQBq4KSKWSboKaIyIOXtofgJwlaQWoBX4ekRsTPb9FXALMIjM3VS+oyoHU6v3L3UIZtZHFPMaBxExl8wts9llV+6m7olZ738F/Go39RrJDHGZmVkJFDVxWA/yxftKHYGZ9RGecsTMzPLixGFmZnlx4jAzs7w4cZiZWV6UmfKpb6urq4vGxsZSh2Fm1qtIejoi6jqW+4zDzMzy4sRhZmZ5ceIwM7O8OHGYmVlenDjMzCwvThxmZpYXJw4zM8uLE4eZmeXFicPMzPLSL54cl7QeeK2bzUcBbxUwnFLqK8fSV44DfCw9VV85lr09joMi4kNrb/eLxLE3JDV29sh9b9RXjqWvHAf4WHqqvnIsxToOD1WZmVlenDjMzCwvThxdu77UARRQXzmWvnIc4GPpqfrKsRTlOHyNw8zM8uIzDjMzy4sTRw4k/Yek5yU9J+luScNLHVN3SfqMpGWS2iT1urtGJNVLekHSSkmXlzqe7pJ0k6R1kpaWOpa9JWm8pIclrUj+3/rrUsfUHZIGSnpK0rPJcXy31DHtLUlpSX+UdG8h+3XiyM0DQE1EHAm8CFxR4nj2xlLg08CjpQ4kX5LSwNXATGAKcJGkKaWNqttuAepLHUSBtAB/FxFHAMcA3+il/122AydHxDRgOlAv6ZgSx7S3/hpYUehOnThyEBH3R0RLsvkEMK6U8eyNiFgRES+UOo5umgGsjIhVEdEMzAbOKXFM3RIRjwIbSx1HIUTEmoh4Jnm/mcwX1djSRpW/yHgv2SxPXr32IrCkccCZwI2F7tuJI39fAuaVOoh+aizwetZ2E73wC6ovkzQR+CjwZGkj6Z5kaGcxsA54ICJ65XEkfgz8A9BW6I7LCt1hbyXpQaCqk13/GBH3JHX+kcxp+e37MrZ85XIsvZQ6Keu1fxH2NZKGAr8C/iYi3i11PN0REa3A9OQ65t2SaiKi112HknQWsC4inpZ0YqH7d+JIRMSpe9ov6WLgLOCU6OH3MHd1LL1YEzA+a3scsLpEsVgWSeVkksbtEfHrUseztyJik6SFZK5D9brEARwHnC2pARgI7CfpZxHxl4Xo3ENVOZBUD3wbODsitpQ6nn5sETBZ0iRJFcCFwJwSx9TvSRLwU2BFRPyvUsfTXZIq2++YlDQIOBV4vrRRdU9EXBER4yJiIpnfk98VKmmAE0eu/g8wDHhA0mJJ15Y6oO6S9ClJTcCxwH2SFpQ6plwlNyhcCiwgcwH2zohYVtqoukfSL4DHgcMkNUn6cqlj2gvHAZ8HTk5+PxYnf+n2NtXAw5KeI/NHygMRUdDbWPsKPzluZmZ58RmHmZnlxYnDzMzy4sRhZmZ5ceIwM7O8OHGYmVlenDjMCkDSe13X2mP7uyQdnLwfKuk6SS8ns7Q+KuloSRXJez+4ayXlxGFWYpKmAumIWJUU3UhmAsTJETEVuAQYlUzs+BBwQUkCNUs4cZgVkDL+Q9JSSUskXZCUpyT93+QM4l5JcyWdnzT7HNA+H9ohwNHAP0VEG0AyG/B9Sd3fJPXNSsanvGaF9WkyazlMA0YBiyQ9Subp6olALTCazJPvNyVtjgN+kbyfCixOJtvrzFLg40WJ3CxHPuMwK6zjgV9ERGtEvAk8QuaL/njglxHRFhFrgYez2lQD63PpPEkozZKGFThus5w5cZgVVmdTv++pHGArmRlMAZYB0yTt6XdzALCtG7GZFYQTh1lhPQpckCwIVAmcADwF/AE4L7nWMQY4MavNCuAjABHxMtAIfDeZdRZJkyWdk7wfCayPiB376oDMOnLiMCusu4HngGeB3wH/kAxN/YrMeiJLgevIrJD3TtLmPnZNJF8hsxDXSklLgBv4YN2Rk4C5xT0Esz3z7Lhm+4ikoRHxXnLW8BRwXESsTdZ+eDjZ3t1F8fY+fg1c0YvXjbc+wHdVme079yYLBVUA30vORIiIrZK+Q2b99D/trnGyeNVvnDSs1HzGYWZmefE1DjMzy4sTh5mZ5cWJw8zM8uLEYWZmeXHiMDOzvDhxmJlZXv4fwv6k54KD570AAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "# plot results\n",
    "n_Cs = len(Cs)\n",
    "number_penaltys = len(penaltys)\n",
    "test_scores = np.array(test_means).reshape(n_Cs,number_penaltys)\n",
    "train_scores = np.array(train_means).reshape(n_Cs,number_penaltys)\n",
    "test_stds = np.array(test_stds).reshape(n_Cs,number_penaltys)\n",
    "train_stds = np.array(train_stds).reshape(n_Cs,number_penaltys)\n",
    "\n",
    "\n",
    "x_axis = np.log10(Cs)\n",
    "for i, value in enumerate(penaltys):\n",
    "    #pyplot.plot(log(Cs), test_scores[i], label= 'penalty:'   + str(value))\n",
    "    plt.errorbar(x_axis, -test_scores[:,i], yerr=test_stds[:,i] ,label = penaltys[i] +' Test')\n",
    "    #plt.errorbar(x_axis, -train_scores[:,i], yerr=train_stds[:,i] ,label = penaltys[i] +' Train')\n",
    "    \n",
    "plt.legend()\n",
    "plt.xlabel( 'log(C)' )                                                                                                      \n",
    "plt.ylabel( 'logloss' )\n",
    "plt.savefig('C:/Users/14916/Desktop/s/LogisticGridSearchCV_C.png' )\n",
    "\n",
    "plt.show()\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "上图给出了L1正则和L2正则下、不同正则参数C对应的模型在训练集上测试集上的logloss。\n",
    "#交叉熵损失系数C----可以看出在训练集上C越大（正则越少）的模型性能越好；"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pickle as pickle\n",
    "\n",
    "\n",
    "pickle.dump(grid.best_estimator_, open(\"C:/Users/14916/Desktop/s/FE_pima-indians-diabetes\", 'wb'))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# #选用saga优化器时的超参数调优"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "saga_lr_lr best_score= 0.47601923510002403\n",
      "saga_lr_lr best_params= {'C': 1, 'penalty': 'l1'}\n"
     ]
    }
   ],
   "source": [
    "saga_lr= LogisticRegression(solver='saga')\n",
    "grid= GridSearchCV(saga_lr, tuned_parameters,cv=5, scoring='neg_log_loss',n_jobs = 4,)\n",
    "grid.fit(X_train,y_train)\n",
    "print('saga_lr_lr best_score=',-grid.best_score_)\n",
    "print('saga_lr_lr best_params=',grid.best_params_)\n",
    "saga_lr_lr_best_estimator_grid=grid.best_estimator_#solver='saga',cv=5, scoring='neg_log_loss'时网格搜索到的最优模型：C=1, penalty=l1"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "通过gridsearch网格搜索可知，最合理的惩罚函数为L1，优化器为liblinear，最优交叉熵损失函数系数应该在1附近"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "scores= 0.48038812679038145\n",
      "C= [0.2]\n"
     ]
    }
   ],
   "source": [
    "from sklearn.linear_model import LogisticRegressionCV\n",
    "\n",
    "Cs = [ 0.04, 0.2, 1, 5, 25]\n",
    "lr_cv= LogisticRegressionCV(Cs=Cs,cv=5,penalty='l1',scoring='neg_log_loss',solver='liblinear')\n",
    "lr_cv.fit(X_train,y_train)\n",
    "print('scores=',-lr_cv.scores_[1].mean())\n",
    "print('C=',lr_cv.C_)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "通过5折逻辑回归校验，C搜索范围缩小至1附近后，越大越好，发现分数有提升进一步0.48+++，最优交叉熵损失函数系数C可能在0.2附近"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "scores= 0.4964501516906202\n",
      "C= [0.2]\n"
     ]
    }
   ],
   "source": [
    "Cs = [ 0.032, 0.08, 0.2, 0.5, 1.25]\n",
    "lr_cv= LogisticRegressionCV(Cs=Cs,cv=5,penalty='l1',scoring='neg_log_loss',solver='liblinear')\n",
    "lr_cv.fit(X_train,y_train)\n",
    "print('scores=',-lr_cv.scores_[1].mean())\n",
    "print('C=',lr_cv.C_)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "分数有增大，说明参数c为0.2不是最好的"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1.5811388300841898 [0.12649110640673517, 0.2, 0.316227766016838, 0.5, 0.7905694150420949]\n",
      "scores= 0.48799975891709624\n",
      "C= [0.2]\n"
     ]
    }
   ],
   "source": [
    "#最优交叉熵损失函数系数可能在0.2和0.5之间\n",
    "d=(0.5/0.2)**0.5\n",
    "Cs = [ 0.2/d, 0.2, 0.2*d, 0.5, 0.5*d]\n",
    "print(d,Cs)\n",
    "lr_cv= LogisticRegressionCV(Cs=Cs,cv=5,penalty='l1',scoring='neg_log_loss',solver='liblinear')\n",
    "lr_cv.fit(X_train,y_train)\n",
    "print('scores=',-lr_cv.scores_[1].mean())\n",
    "print('C=',lr_cv.C_)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Cs=[0.5,0.2,1,0.31622777]，得分最高的C=0.5(scores= 0.4831590595663245)，所以最优的Logistic回归模型的正则超参数C为0.5"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "当penalty='l1',C=0.5,solver='liblinear'时，accuracy_score= 0.7792207792207793 ,log_loss= 7.625485676664965\n"
     ]
    }
   ],
   "source": [
    "#将数据分割训练数据与测试数据\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.metrics import accuracy_score\n",
    "from sklearn.metrics import log_loss\n",
    "\n",
    "# 随机采样10%的数据构建测试样本\n",
    "X_train, X_test, y_train, y_true = train_test_split(X_train, y_train, random_state=123, test_size=0.1)\n",
    "\n",
    "lr= LogisticRegression(penalty='l1',C=0.5,solver='liblinear').fit(X_train, y_train)\n",
    "y_pred=lr.predict(X_test)\n",
    "\n",
    "accuracy_score_my=accuracy_score(y_true, y_pred)\n",
    "log_loss_my=log_loss(y_true, y_pred)\n",
    "print(\"当penalty='l1',C=0.5,solver='liblinear'时，accuracy_score=\",accuracy_score_my,\",log_loss=\",log_loss_my)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "LogisticRegression(C=1, class_weight=None, dual=False, fit_intercept=True,\n",
       "                   intercept_scaling=1, l1_ratio=None, max_iter=100,\n",
       "                   multi_class='warn', n_jobs=None, penalty='l1',\n",
       "                   random_state=None, solver='liblinear', tol=0.0001, verbose=0,\n",
       "                   warm_start=False)"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "liblinear_lr_best_estimator_grid"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "当penalty='l1',C=1,solver='liblinear'时，accuracy_score= 0.7922077922077922 ,log_loss= 7.176930139068723\n"
     ]
    }
   ],
   "source": [
    "y_pred=liblinear_lr_best_estimator_grid.predict(X_test)\n",
    "accuracy_score_1=accuracy_score(y_true, y_pred)\n",
    "log_loss_1=log_loss(y_true, y_pred)\n",
    "print(\"当penalty='l1',C=1,solver='liblinear'时，accuracy_score=\",accuracy_score_1,\",log_loss=\",log_loss_1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "LogisticRegression(C=1, class_weight=None, dual=False, fit_intercept=True,\n",
       "                   intercept_scaling=1, l1_ratio=None, max_iter=100,\n",
       "                   multi_class='warn', n_jobs=None, penalty='l1',\n",
       "                   random_state=None, solver='saga', tol=0.0001, verbose=0,\n",
       "                   warm_start=False)"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "saga_lr_lr_best_estimator_grid"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "当penalty='l1',C=1,solver='saga'时，accuracy_score= 0.7922077922077922 ,log_loss= 7.176930139068723\n"
     ]
    }
   ],
   "source": [
    "y_pred=saga_lr_lr_best_estimator_grid.predict(X_test)\n",
    "accuracy_score_2=accuracy_score(y_true, y_pred)\n",
    "log_loss_2=log_loss(y_true, y_pred)\n",
    "print(\"当penalty='l1',C=1,solver='saga'时，accuracy_score=\",accuracy_score_2,\",log_loss=\",log_loss_2)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "发现：当其他参数相同时，solver不影响模型评分，如：solver='saga'或'liblinear'时，accuracy_score= 0.7922077922077922 ,log_loss= 7.176930139068723"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "结论：用GridSearchCV自动查找的模型参数C=1，比自己手动调的C=0.5，性能更高：最佳得分为accuracy_score= 0.7922077922077922 ,log_loss= 7.176930139068723"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "在已找到C=1的最佳正则超参数提前下，系数为："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "accuracy_score= 0.7792207792207793 ,log_loss= 7.625485676664965\n",
      "coef_:\n",
      " [[ 0.3321078   1.08889415 -0.05692123  0.0548324  -0.09840344  0.56952022\n",
      "   0.30708682  0.20023999]]\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "c:\\python\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n"
     ]
    }
   ],
   "source": [
    "lr= LogisticRegression(penalty='l1',tol=0.00000001,C=1,random_state=456).fit(X_train, y_train)\n",
    "y_pred=lr.predict(X_test)\n",
    "accuracyScore=accuracy_score(y_true, y_pred)\n",
    "logLoss=log_loss(y_true, y_pred)\n",
    "print(\"accuracy_score=\",accuracyScore,\",log_loss=\",logLoss)\n",
    "print('coef_:\\：n',lr.coef_)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "最佳地分分数为0.77922，交叉熵损失为7.625，并且\n",
    "在已找到C=1的最佳正则超参数提前下，最大的系数为：0.33。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
